{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "# Connecting the Python Code with the google drive to access the datasets\n",
        "from google.colab import drive\n",
        "drive.mount(\"/content/drive\")"
      ],
      "metadata": {
        "id": "kl1HFguXU8VJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from scipy.stats import chi2\n",
        "import datetime as dt\n",
        "dir = '/content/drive/MyDrive/HFT/'\n"
      ],
      "metadata": {
        "id": "gO_27Mu2_I_h"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#Input file\n",
        "df = pd.read_excel(dir+'Table2.xlsx')\n",
        "df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d')\n",
        "df.head()"
      ],
      "metadata": {
        "id": "S7ms6ctv_-C2"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# Define sample periods\n",
        "sample_periods = {\n",
        "    \"193101-202012\": (193101, 202012),\n",
        "    \"193101-196012\": (193101, 196012),\n",
        "    \"196101-199012\": (196101, 199012),\n",
        "    \"199101-202012\": (199101, 202012),\n",
        "}\n",
        "\n",
        "# Identify return columns\n",
        "returns_columns = [col for col in df.columns if col.startswith('ret_t')]\n",
        "\n",
        "# Create an empty dictionary to store results\n",
        "results = {}\n",
        "\n",
        "# Loop through each sample period\n",
        "for period, (start, end) in sample_periods.items():\n",
        "    # Filter data for the given period\n",
        "    df_filtered = df[(df['DATE'].dt.strftime('%Y%m').astype(int) >= start) &\n",
        "                     (df['DATE'].dt.strftime('%Y%m').astype(int) <= end)]\n",
        "\n",
        "    # Compute means and standard deviations\n",
        "    means = df_filtered[returns_columns].mean()\n",
        "    std_devs = df_filtered[returns_columns].std()\n",
        "    n = len(df_filtered)  # Number of observations\n",
        "\n",
        "    # Compute t-stats\n",
        "    t_stats = np.sqrt(n - 1) * means / std_devs\n",
        "\n",
        "    # Compute chi-squared statistic\n",
        "    mean_vector = means.values  # Shape (12,)\n",
        "    cov_matrix = df_filtered[returns_columns].cov().values  # Shape (12,12)\n",
        "    chi_squared_stat = n * mean_vector.T @ np.linalg.inv(cov_matrix) @ mean_vector\n",
        "\n",
        "    # Store results\n",
        "    results[period] = np.concatenate([means.round(4).values, t_stats.round(2).values, [chi_squared_stat.round(2)]])\n",
        "\n",
        "# Create a summary DataFrame\n",
        "index_labels = [f\"{col}_Mean\" for col in returns_columns] + \\\n",
        "               [f\"{col}_T-Stat\" for col in returns_columns] + [\"Chi-Squared\"]\n",
        "\n",
        "summary_df = pd.DataFrame(results, index=index_labels)\n",
        "summary_df.to_excel(dir+'Table2_out.xlsx')\n",
        "# Display the result\n",
        "print(summary_df)\n"
      ],
      "metadata": {
        "id": "Xb1vpvZvTGV0"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}